In [1]:
# Required Packages
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import pandas_datareader.data as pdr
import math

from datetime import datetime
from datetime import timedelta
import matplotlib.dates as mdates

# Progress Bar
from IPython.core.display import Image, display
import progressbar

# Plots
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl

# sns setting
sns.set_context("paper", rc={"font.size":12,"axes.titlesize":14,"axes.labelsize":12})

# plt setting
sns.set_style('whitegrid')
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

The Market Analysis for Communication Services Sector

In this article, the Communication Services Sector from Yahoo! Finance is used, and we analyze the current top tech companies' stock prices.

Table of contentens

Symbol Name Symbol Name Symbol Name
AAPL Apple Inc. NVDA NVIDIA Corporation INTU Intuit Inc.
MSFT Microsoft Corporation ACN Accenture plc VMW VMware, Inc.
TSM Taiwan Semiconductor Manufacturing Company Limited AVGO Broadcom Inc. AMAT Applied Materials, Inc.
INTC Intel Corporation IBM International Business Machines Corporation MU Micron Technology, Inc.
CSCO Cisco Systems, Inc. ASML ASML Holding N.V. NOW ServiceNow, Inc.
ORCL Oracle Corporation TXN Texas Instruments Incorporated UBER Uber Technologies, Inc.
SAP SAP SE QCOM QUALCOMM Incorporated AMD Advanced Micro Devices, Inc.
ADBE Adobe Inc. FIS Fidelity National Information Services, Inc.
CRM salesforce.com, inc. SNE Sony Corporation
In [2]:
Stock_list = ['AAPL','MSFT','TSM','INTC','CSCO','ORCL','SAP','ADBE','CRM',
             'NVDA','ACN','AVGO','IBM','ASML','TXN','QCOM','FIS','SNE','INTU','VMW','AMAT','MU','NOW','UBER','AMD']
Stock_Dic = {'AAPL':'Apple Inc.','MSFT':'Microsoft Corporation','TSM':'Taiwan Semiconductor Manufacturing Company Limited',
            'INTC':'Intel Corporation','CSCO':'Cisco Systems, Inc.','ORCL':'Oracle Corporation',
            'SAP':'SAP SE','ADBE':'Adobe Inc.','CRM':'salesforce.com, inc.',
            'NVDA':'NVIDIA Corporation','ACN':'Accenture plc','AVGO':'Broadcom Inc.',
            'IBM':'International Business Machines Corporation','ASML':'ASML Holding N.V.',
            'TXN':'Texas Instruments Incorporated','QCOM':'QUALCOMM Incorporated',
            'FIS':'Fidelity National Information Services, Inc.','SNE':'Sony Corporation',
            'INTU':'Intuit Inc.','VMW':'VMware, Inc.','AMAT':'Applied Materials, Inc.',
            'MU':'Micron Technology, Inc.','NOW':'ServiceNow, Inc.',
            'UBER':'Uber Technologies, Inc.','AMD':'Advanced Micro Devices, Inc.'}

The data is collected from The beginning of the five years ago until now.

In [3]:
start, end = [datetime(datetime.today().year-5, 1, 1), datetime.today()]
# start, end = [datetime(datetime.today().year-5,datetime.today().month,datetime.today().day), datetime.today()]
In [4]:
def Timeline_plot(start, end, width = 16):
    fig, ax = plt.subplots(figsize=(width, 1))
    Temp = pd.date_range(start, datetime(end.year, end.month, 1), freq='MS')
    ax.plot((start, end), (0, 0), 'k', alpha=.5)
    ax.get_xaxis().set_major_locator(mdates.MonthLocator(interval=2))
    ax.get_xaxis().set_major_formatter(mdates.DateFormatter("%b %Y"))
    #fig.autofmt_xdate()
    _ = plt.setp((ax.get_yticklabels() + ax.get_yticklines() + list(ax.spines.values())), visible=False)
    for i in Temp:
        ax.scatter(i, 0, s=30, facecolor='#e74c3c', edgecolor='k')
    ax.grid(False)
    _  = plt.xticks(rotation=90)
    Temp1 = Temp.min().replace(day=1) - timedelta(days=1)
    Temp2 = Temp.max().replace(day=1) + timedelta(days=31)
    _ = ax.set_xlim ([datetime(Temp1.year,Temp1.month, 1), datetime(Temp2.year,Temp2.month, 1)]) 
In [5]:
Timeline_plot(start, end)

Collecting data from Yahoo Finance!, and creating moving averages for 10, 20 and 60 day periods of time.

In [6]:
def Get_Data(Inp):
    Days = [10, 20, 60]
    Out = pdr.DataReader(Inp, 'yahoo', start, end)
    Out.insert(0, 'Symbol', Inp) 
    for j in Days:
        column_name = "Moving Ave. %s days" % (str(j))
        Out[column_name] = Out['Adj Close'].rolling(window=j, center=False).mean()
    return Out
In [7]:
Data = Get_Data(Stock_list[0])

Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Stock_list),
                                       widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in Stock_list[1:]:
    Counter+=1
    Progress_Bar.update(Counter)
    Temp = Get_Data(i)
    Data = pd.concat([Data, Temp])
    del Temp
Progress_Bar.finish()
|#########################################################################|100%

Displaying today's data only:

In [8]:
Today = Data[Data.index == Data.index[-1]].reset_index(drop = True)
Today.style.hide_index()
Out[8]:
Symbol High Low Open Close Volume Adj Close Moving Ave. 10 days Moving Ave. 20 days Moving Ave. 60 days
AAPL 267.99 265.9 266.6 267.25 1.05847e+07 267.25 265.188 262.367 239.388
MSFT 152.29 151.285 152.1 151.38 1.12704e+07 151.38 150.482 147.772 141.458
TSM 53.16 52.8073 53.06 53.09 3.16326e+06 53.09 53.411 53.175 49.337
INTC 58.295 57.79 58.15 58.05 8.00674e+06 58.05 58.256 57.973 53.8293
CSCO 45.54 45.15 45.24 45.31 9.34095e+06 45.31 45.179 46.471 47.3677
ORCL 56.4358 56.09 56.33 56.17 4.11237e+06 56.17 56.359 56.16 54.9179
SAP 136.26 135.39 135.46 135.95 218139 135.95 135.468 135.194 126.804
ADBE 310 308 308.52 309.53 1.13936e+06 309.53 302.523 295.526 281.903
CRM 163.4 161.39 161.61 162.97 2.05177e+06 162.97 162.856 161.694 154.02
NVDA 218.14 215.78 217 216.74 3.47434e+06 216.74 212.865 210.486 193.702
ACN 201.655 200.44 201.24 201.24 738901 201.24 198.609 194.463 190.34
AVGO 317.79 315 316.85 316.21 903755 316.21 315.406 312.946 293.592
IBM 134.51 133.04 133.6 134.48 1.66962e+06 134.48 134.392 135.111 137.417
ASML 273.43 270.53 273.43 270.62 266328 270.62 270.683 269.944 256.938
TXN 121.635 119.99 121.15 120.21 3.42424e+06 120.21 118.315 118.775 123.58
QCOM 85.1 83.43 84.51 83.55 3.81753e+06 83.55 86.314 87.4835 80.9764
FIS 138.65 137.45 138.27 138.08 566693 138.08 137.085 134.821 132.537
SNE 63.59 63.2165 63.26 63.48 925484 63.48 62.275 61.8365 59.9648
INTU 262.42 258.34 261.34 258.89 687632 258.89 265.514 261.65 264.003
VMW 160.84 155.5 160.27 155.58 1.21798e+06 155.58 166.009 165.396 157.008
AMAT 58.295 57.725 58.09 57.9 2.85726e+06 57.9 58.8781 57.4402 53.5272
MU 47.95 47.44 47.94 47.51 7.17038e+06 47.51 46.901 47.2595 46.9082
NOW 283.49 280.134 280.97 283 634744 283 276.9 262.462 257.49
UBER 29.85 29.25 29.41 29.61 8.03889e+06 29.61 28.538 28.186 30.6527
AMD 39.5101 39.02 39.1 39.15 1.7242e+07 39.15 39.672 38.072 32.9773
In [9]:
print('Currently, the stock with the highest volume is %s' %
      Stock_Dic[Today[Today.Volume == Today.Volume.max()].Symbol.values[0]])
Currently, the stock with the highest volume is Advanced Micro Devices, Inc.

Consider AMD for example. We have,

In [10]:
def Disp_Data(Inp, df = Data):
    Out = df[df.Symbol == Inp].drop(columns=['Symbol'])
    return Out
In [11]:
Temp = Disp_Data('AMD')
Temp.describe()
Out[11]:
High Low Open Close Volume Adj Close Moving Ave. 10 days Moving Ave. 20 days Moving Ave. 60 days
count 1489.000000 1489.000000 1489.000000 1489.000000 1.489000e+03 1489.000000 1480.000000 1470.000000 1430.000000
mean 11.611592 11.118999 11.367898 11.368207 4.732721e+07 11.368207 11.304743 11.236723 11.036263
std 9.939936 9.532397 9.740708 9.741012 4.053446e+07 9.741012 9.613383 9.481477 9.108275
min 1.690000 1.610000 1.620000 1.620000 0.000000e+00 1.620000 1.728000 1.787000 1.831667
25% 3.120000 3.010000 3.060000 3.060000 1.585970e+07 3.060000 3.047000 2.995875 2.897917
50% 10.030000 9.510000 9.750000 9.710000 3.788630e+07 9.710000 9.812500 9.984250 9.624500
75% 15.490000 14.740000 15.130000 15.000000 6.442610e+07 15.000000 14.966500 14.472500 13.492167
max 41.790001 40.070000 40.959999 41.290001 3.250584e+08 41.290001 39.672001 38.072001 32.977333
In [12]:
Temp.head().dropna(axis = 1)
Out[12]:
High Low Open Close Volume Adj Close
Date
2014-01-02 3.98 3.84 3.85 3.95 20548400.0 3.95
2014-01-03 4.00 3.88 3.98 4.00 22887200.0 4.00
2014-01-06 4.18 3.99 4.01 4.13 42398300.0 4.13
2014-01-07 4.25 4.11 4.19 4.18 42932100.0 4.18
2014-01-08 4.26 4.14 4.23 4.18 30678700.0 4.18
In [13]:
Temp.tail()
Out[13]:
High Low Open Close Volume Adj Close Moving Ave. 10 days Moving Ave. 20 days Moving Ave. 60 days
Date
2019-11-22 39.889999 38.189999 39.360001 39.150002 56931900.0 39.150002 38.827000 36.894000 32.435000
2019-11-25 40.169998 39.490002 39.500000 39.790001 45769500.0 39.790001 39.175000 37.199000 32.574000
2019-11-26 39.480000 38.810001 39.299999 38.990002 43603300.0 38.990002 39.403001 37.497001 32.708833
2019-11-27 39.759998 39.070000 39.459999 39.410000 33630100.0 39.410000 39.592001 37.811000 32.849833
2019-11-29 39.510101 39.020000 39.099998 39.150002 17242025.0 39.150002 39.672001 38.072001 32.977333

Stock Prices Fluctuations

Let's plot the stocks adjusted the closing price for all stock data under study.

In [14]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 12))
for i in range(len(Stock_list)):
    Data[Data.Symbol == Stock_list[i]]['Adj Close'].plot(ax=ax, label=Stock_Dic[Stock_list[i]])
_ = ax.legend(loc='upper center', bbox_to_anchor=(1.2, 0.9), shadow=True, ncol=1, fontsize=12)
_, Temp = ax.get_ylim()
_ = ax.set_ylim([0, 350])

We can create some new functions that can be beneficial for applications as well.

In [15]:
def List_Search(Inp, key):
    # Inp: A list
    Out=list()
    for i in range(len(Inp)):
        if Inp[i].find(key) != -1:
            Out.append(Inp[i])
    return Out

def List_Diff(Inp_A, Inp_B):
    # Inp_A: A list
    # Inp_B: A list
    Out=list(set(Inp_A)-set(Inp_B))
    return Out
In [16]:
# Creating a new list of Columns
Columns = List_Diff(Data.columns.tolist(), List_Search(Data.columns.tolist(), 'Moving Ave'))
Columns = List_Diff(Columns, ['Symbol'])
Temp = ['Ave ' + i for i in Columns]

# A new DataFrame
Ave_df = pd.DataFrame({'Symbol':Stock_list})
for i in Temp:
    Ave_df[i]=''
del Temp
# Progress Bar    
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Stock_list),
                                       widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in range(len(Stock_list)):
    Counter+=1
    Progress_Bar.update(Counter)
    Ave_df.iloc[i,1:] = Data[Data.Symbol == Stock_list[i]][Columns].mean().values
               
Progress_Bar.finish()
|#########################################################################|100%

The following table shows the average values for all columns of the Data.

In [17]:
Ave_df.style.hide_index()
Out[17]:
Symbol Ave Adj Close Ave Close Ave Low Ave Volume Ave Open Ave High
AAPL 136.593 142.226 140.946 4.06167e+07 142.168 143.415
MSFT 70.3581 73.5001 72.8403 2.99414e+07 73.4885 74.0799
TSM 28.6002 31.3021 31.0451 8.66303e+06 31.2921 31.5347
INTC 36.1227 38.7382 38.3478 2.67566e+07 38.71 39.0887
CSCO 32.1545 34.6427 34.349 2.51033e+07 34.6353 34.9258
ORCL 42.7222 44.6273 44.2796 1.48216e+07 44.6073 44.9709
SAP 90.4497 93.9282 93.3622 935715 93.8911 94.4232
ADBE 148.282 148.282 146.627 2.90723e+06 148.241 149.713
CRM 95.7469 95.7469 94.6101 5.21413e+06 95.7547 96.7841
NVDA 106.206 107.053 105.381 1.14092e+07 107.077 108.621
ACN 120.323 125.9 124.928 2.35483e+06 125.813 126.737
AVGO 173.849 184.698 182.379 3.06316e+06 184.669 186.803
IBM 135.342 154.739 153.663 4.41797e+06 154.737 155.815
ASML 135.502 139.317 138.153 912492 139.305 140.402
TXN 72.9498 77.7456 76.996 5.62105e+06 77.7146 78.4317
QCOM 57.8077 64.0875 63.4598 1.17657e+07 64.0715 64.7108
FIS 80.9753 83.5094 82.8449 1.86626e+06 83.4757 84.0904
SNE 35.1248 35.5356 35.2843 1.46227e+06 35.5334 35.7709
INTU 140.688 143.645 142.213 1.55418e+06 143.563 144.868
VMW 93.1561 106.827 105.559 1.83051e+06 106.792 108.083
AMAT 32.0171 33.342 32.9237 1.27669e+07 33.3394 33.7464
MU 31.0261 31.0261 30.5141 3.03698e+07 31.0468 31.5429
NOW 121.587 121.587 119.739 1.78679e+06 121.545 123.265
UBER 36.4539 36.4539 35.7738 1.72328e+07 36.5642 37.1745
AMD 11.3682 11.3682 11.119 4.73272e+07 11.3679 11.6116
In [18]:
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(18, 14), sharex=False)
# Upper Plot
Ave_df.plot.bar(x='Symbol', y='Ave Volume', rot=90,ax=ax[0], legend=False ,color='#34495e', edgecolor='k')
# Lower Plot
Temp = Ave_df.drop(columns=['Ave Volume'])
Temp.plot.bar(x='Symbol', rot=90,ax=ax[1], legend=True, edgecolor='k')
_ = ax[1].legend(loc='upper center', bbox_to_anchor=(0.5, -0.2), shadow=True, ncol=len(Temp.columns)-1, fontsize=12)
# Plot setting
plt.subplots_adjust(hspace=0.3,wspace=0.2)

The Moving Average of the Stocks

Let's plot moving averages for 10, 20 and 60 day periods of time for the top 4 companies with the highest volume on average.

In [19]:
def TopN_volumes(N, df = Ave_df):
    Out = df.sort_values(by='Ave Volume', ascending=False).iloc[:N,0].tolist()
    return Out 

# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on average.
N = 4
mylist = TopN_volumes(N)
In [20]:
# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on average.
N = 4
mylist = Ave_df.sort_values(by='Ave Volume', ascending=False).iloc[:N,0].tolist()
In [21]:
fig, ax = plt.subplots(nrows = math.ceil(N/2), ncols = 2, figsize=(16, 6*math.ceil(N/2)))

Counter = 0
for i in range(math.ceil(N/2)):
    for j in range(2):
        Disp_Data(mylist[Counter])[Columns].plot(ax=ax[i,j], legend = True)
        _ = ax[i,j].set_title(Stock_Dic[mylist[Counter]])
        _ = ax[i,j].legend(loc='upper center', bbox_to_anchor=(0.5, -0.25), shadow=True, ncol=2, fontsize=12)
        Counter += 1
plt.subplots_adjust(hspace=0.6, wspace=0.2)

The Daily Return Average of a Stock

Daily return can be calculated using the percentage change of the adjusted closing price.

In [22]:
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 10))
for i in range(len(mylist)):
    Disp_Data(mylist[i])['Adj Close'].pct_change().plot(ax=ax, label=Stock_Dic[mylist[i]])
_ = ax.legend(loc='upper center', bbox_to_anchor=(1.15, 0.9), shadow=True, ncol=1, fontsize=12)
Temp = ax.get_ylim()
_ = ax.set_ylim([np.float(format(Temp[0], '.0e')) ,np.float(format(Temp[1], '.0e'))])
_ = ax.set_title('Daily Return', fontsize=14)

The Correlation Between Daily Returns of Different Stocks

First, we need to create a new data frame by reading the Adj Close column from all stock data under study. We have,

In [23]:
All_data = pdr.DataReader(Stock_list, 'yahoo', start, end)['Adj Close']
In [24]:
All_data.head()
Out[24]:
Symbols AAPL MSFT TSM INTC CSCO ORCL SAP ADBE CRM NVDA ... QCOM FIS SNE INTU VMW AMAT MU NOW UBER AMD
Date
2014-01-02 71.107201 32.471554 14.074995 21.742401 18.357819 34.576424 76.402283 59.290001 54.860001 15.021359 ... 60.438515 48.849117 16.630365 71.442017 74.349762 15.841674 21.660000 55.689999 NaN 3.95
2014-01-03 69.545288 32.253098 13.910857 21.733967 18.341125 34.484753 75.880394 59.160000 55.119999 14.841407 ... 60.084064 48.987373 16.649748 71.310333 74.283302 15.805568 20.969999 55.180000 NaN 4.00
2014-01-06 69.924515 31.571505 13.869821 21.464188 18.366167 34.347263 75.961388 58.119999 54.230000 15.040303 ... 59.927441 48.710854 16.766045 71.253891 73.992546 15.606986 20.670000 55.730000 NaN 4.13
2014-01-07 69.424438 31.816179 13.878029 21.573792 18.616493 34.695580 75.745430 58.970001 54.950001 15.286549 ... 60.372574 48.803028 16.785425 72.495697 78.677834 15.679197 21.730000 57.779999 NaN 4.18
2014-01-08 69.864105 31.248198 14.025754 21.438896 18.599806 34.576427 76.888168 58.900002 56.939999 15.494923 ... 60.735283 48.849117 17.686722 72.090187 77.631119 15.724328 23.870001 57.889999 NaN 4.18

5 rows × 25 columns

In [25]:
All_data.tail()
Out[25]:
Symbols AAPL MSFT TSM INTC CSCO ORCL SAP ADBE CRM NVDA ... QCOM FIS SNE INTU VMW AMAT MU NOW UBER AMD
Date
2019-11-22 261.779999 149.589996 52.790001 57.610001 44.849998 56.389999 135.410004 299.299988 162.809998 210.734512 ... 84.889999 135.460007 61.200001 259.809998 165.860001 55.939999 45.869999 279.980011 29.559999 39.150002
2019-11-25 266.369995 151.229996 53.759998 58.810001 45.450001 56.560001 135.740005 305.279999 161.710007 221.046906 ... 85.290001 136.880005 61.799999 257.070007 168.729996 58.279999 47.520000 279.750000 29.110001 39.790001
2019-11-26 264.290009 152.029999 53.630001 58.900002 45.310001 56.509998 135.339996 307.899994 162.539993 216.839996 ... 84.629997 137.539993 63.150002 265.880005 164.899994 57.570000 46.720001 278.859985 29.530001 38.990002
2019-11-27 267.839996 152.320007 53.889999 58.509998 45.240002 56.610001 135.410004 309.059998 161.509995 218.240005 ... 84.800003 138.429993 63.720001 262.149994 161.149994 58.290001 48.160000 281.279999 29.490000 39.410000
2019-11-29 267.250000 151.380005 53.090000 58.049999 45.310001 56.169998 135.949997 309.529999 162.970001 216.740005 ... 83.550003 138.080002 63.480000 258.890015 155.580002 57.900002 47.509998 283.000000 29.610001 39.150002

5 rows × 25 columns

The returns can be analyzed using the percentage change from the adj Close.

In [26]:
All_returns = All_data.pct_change()
In [27]:
All_returns.tail()
Out[27]:
Symbols AAPL MSFT TSM INTC CSCO ORCL SAP ADBE CRM NVDA ... QCOM FIS SNE INTU VMW AMAT MU NOW UBER AMD
Date
2019-11-22 -0.000878 0.000736 -0.003586 -0.010478 0.000223 0.002845 -0.000148 0.001673 -0.000430 0.003378 ... -0.002585 -0.007401 -0.005202 -0.041822 0.000966 -0.008859 0.007025 0.003081 0.003394 -0.009362
2019-11-25 0.017534 0.010963 0.018375 0.020830 0.013378 0.003015 0.002437 0.019980 -0.006756 0.048935 ... 0.004712 0.010483 0.009804 -0.010546 0.017304 0.041831 0.035971 -0.000822 -0.015223 0.016347
2019-11-26 -0.007809 0.005290 -0.002418 0.001530 -0.003080 -0.000884 -0.002947 0.008582 0.005133 -0.019032 ... -0.007738 0.004822 0.021845 0.034271 -0.022699 -0.012183 -0.016835 -0.003181 0.014428 -0.020106
2019-11-27 0.013432 0.001908 0.004848 -0.006621 -0.001545 0.001770 0.000517 0.003767 -0.006337 0.006456 ... 0.002009 0.006471 0.009026 -0.014029 -0.022741 0.012507 0.030822 0.008678 -0.001355 0.010772
2019-11-29 -0.002203 -0.006171 -0.014845 -0.007862 0.001547 -0.007773 0.003988 0.001521 0.009040 -0.006873 ... -0.014741 -0.002528 -0.003767 -0.012436 -0.034564 -0.006691 -0.013497 0.006115 0.004069 -0.006597

5 rows × 25 columns

In [28]:
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 10))
All_returns.plot(marker='*', legend=True, ax=ax)
_ = ax.set_ylim([-0.3 , 0.3])
_ = ax.set_title('Daily Returns', fontsize=14)
_ = ax.legend(Stock_Dic.values(), loc='upper center', bbox_to_anchor=(1.2, 0.9), shadow=True, ncol=1, fontsize=12)

The following graphs show the correlation between different stocks.

In [29]:
_ = sns.jointplot('AAPL','MSFT', All_returns, kind='reg', space=0, size=6, ratio=4)
_ = sns.jointplot('AAPL','AMD', All_returns, kind='reg', space=0, size=6, ratio=4)

Now, we can use the pairplot tool to visualize all.

In [30]:
# Remove missing values
Temp = TopN_volumes(8, df = Ave_df)
Temp = All_returns[Temp].dropna()
# plot
_ = sns.pairplot(Temp, diag_kind='kde')

Nonetheless, the correlation matrix and plot are always convenient to see numerical values for correlations.

In [31]:
# Correlation Matrix
Cor_matrix = Temp.corr()
Cor_matrix
Out[31]:
Symbols AMD AAPL MU MSFT INTC CSCO UBER ORCL
Symbols
AMD 1.000000 0.507143 0.472815 0.596956 0.404480 0.380116 0.362763 0.413663
AAPL 0.507143 1.000000 0.554873 0.644031 0.667467 0.562318 0.346737 0.560989
MU 0.472815 0.554873 1.000000 0.524656 0.627446 0.312198 0.262869 0.472465
MSFT 0.596956 0.644031 0.524656 1.000000 0.622182 0.570232 0.393564 0.590636
INTC 0.404480 0.667467 0.627446 0.622182 1.000000 0.499031 0.265160 0.514320
CSCO 0.380116 0.562318 0.312198 0.570232 0.499031 1.000000 0.347379 0.466256
UBER 0.362763 0.346737 0.262869 0.393564 0.265160 0.347379 1.000000 0.260636
ORCL 0.413663 0.560989 0.472465 0.590636 0.514320 0.466256 0.260636 1.000000
In [32]:
def Correlation_Plot (Df,Fig_Size):
    Correlation_Matrix = Df.corr()
    mask = np.zeros_like(Correlation_Matrix)
    mask[np.triu_indices_from(mask)] = True
    for i in range(len(mask)):
        mask[i,i]=0
    Fig, ax = plt.subplots(figsize=(Fig_Size,Fig_Size))
    sns.heatmap(Correlation_Matrix, ax=ax, mask=mask, annot=True, square=True, 
                cmap =sns.color_palette("RdBu", n_colors=10), linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": .5})
    bottom, top = ax.get_ylim()
    _ = ax.set_ylim(bottom + 0.5, top - 0.5)
In [33]:
Correlation_Plot (Temp, 8)

Here, darker shades of blue represent a higher correlation.

In [34]:
def Risk_Plot(data):
    fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 6))
    _ = ax.scatter(data.mean(), data.std(), s=25, color = "#e74c3c")
    _ = ax.set_xlabel('Expected Return')
    _ = ax.set_ylabel('Risk')
    for label,x,y in zip(data.columns, data.mean(), data.std()):
        plt.annotate(label, xy=(x,y), xytext=(-50,0), textcoords = 'offset points',
                     ha = 'right', va = 'bottom', arrowprops=dict(facecolor="#9b59b6", shrink=0.001))
    plt.autoscale(enable=True, axis='both', tight=True)
    return ax.get_xlim()
In [35]:
Temp = Risk_Plot(All_returns)
In [36]:
print("""The current trend seems to output a value between %.2e and %.2e.
We would like to identify a stock with high return and low risk!
""" % Temp)
The current trend seems to output a value between -4.75e-03 and 5.06e-03.
We would like to identify a stock with high return and low risk!

Let's find the quantile for a stock.

In [37]:
qt = All_returns['AMD'].quantile(0.05)
qt_pct = abs(All_returns['AMD'].quantile(0.05))*100
print('Quantile Percentage: %0.4f' % qt_pct)
Quantile Percentage: 5.2468
In [38]:
print("""The 0.05 empirical quantile of daily returns is at {0:.2f}%.
      This means that with 95% confidence, the worst daily loss will not exceed {0:.2f}% (of the investment)."""
      .format(qt,qt_pct))
The 0.05 empirical quantile of daily returns is at -0.05%.
      This means that with 95% confidence, the worst daily loss will not exceed -0.05% (of the investment).

Predicting Future Behaviors

To predict future behaviors, we can implement the Monte Carlo method (also see this link and this link).

In [39]:
# consider a year
days = 365
# Delta t
dt = 1/365

Defining a Monte Carlo function fo the Stock price.

In [40]:
def stock_monte_carlo(start_price, days, mu, sigma):
    '''Function takes in stock price, number of days to run, mean and standard deviation values'''
    price = np.zeros(days)
    price[0] = start_price
    
    shock = np.zeros(days)
    drift = np.zeros(days)
    
    for x in range(1,days):       
        #Shock and drift formulas taken from the Monte Carlo formula
        shock[x] = np.random.normal(loc=mu*dt,scale=sigma*np.sqrt(dt))        
        drift[x] = mu * dt
        #New price = Old price + Old price*(shock+drift)
        price[x] = price[x-1] + (price[x-1] * (drift[x]+shock[x]))
    return price
In [41]:
def Monte_Carlo_Analysis(Inp, mu, sigma, N=1e2, days = days):
    # get the data for Inp teach
    df = Disp_Data(Inp)
    # set the last entry of the open column as the starting price
    start_price = df['Open'][-1]
    # Ouput Figure
    N = int(N)
    fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
    for run in range(100):
        _ = plt.plot(stock_monte_carlo(start_price, days, mu, sigma))
        _ = ax.set_xlabel('Days')
        _ = ax.set_ylabel('Price')
        _ = ax.set_title('Monte Carlo Analysis for %s' % Stock_Dic[Inp])
    _ = ax.set_xlim([0,days])
    return df
In [42]:
def Final_price_distribution_simulations(Inp, mu, sigma, N= 1e4, days = days):
    # get the data for Inp teach
    df = Disp_Data(Inp)
    # set the last entry of the open column as the starting price
    start_price = df['Open'][-1]
    # Simulations array
    N = int(N)
    simulations = np.zeros(N)
    # Progress Bar    
    Counter = 0
    Progress_Bar = progressbar.ProgressBar(maxval= N, widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
    Progress_Bar.start()

    for i in range(N):
        simulations[i] = stock_monte_carlo(start_price, days, mu, sigma)[days-1]
        Counter+=1
        Progress_Bar.update(Counter)
    Progress_Bar.finish()
    return simulations
In [43]:
def Final_price_distribution_plot(simulations, Inp):
    # get the data for Inp teach
    df = Disp_Data(Inp)
    # set the last entry of the open column as the starting price
    start_price = df['Open'][-1]
    # Output Figure
    fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
    q = np.percentile(simulations, 1)
    _ = ax.hist(simulations, bins='auto', color = '#34495e')
    _ = plt.figtext(0.75, 0.80, "Start price: $%.2f" % start_price, fontsize = 12)
    _ = plt.figtext(0.75, 0.75, "Mean final price: $%.2f" % simulations.mean(), fontsize = 12)
    _ = plt.figtext(0.75, 0.70, "VaR(0.99): $%.2f" % (start_price -q,), fontsize = 12)
    _ = plt.figtext(0.15,0.665, "q(0.99): $%.2f" % q, fontsize = 12)
    _ = ax.set_xlim()
    _ = ax.axvline(x=q, linewidth=4, color='#e74c3c')
    _ = ax.set_title("Final price distribution for %s after %s days" % (Stock_Dic[Inp], days), weight='bold')

Advanced Micro Devices, Inc.

In [44]:
Stock = 'AMD'
# mean
mu = All_returns.mean()[Stock]
# standard deviation
sigma = All_returns.std()[Stock]
# Analysis
Monte_Carlo_Analysis(Stock, mu = mu, sigma = sigma)
Out[44]:
High Low Open Close Volume Adj Close Moving Ave. 10 days Moving Ave. 20 days Moving Ave. 60 days
Date
2014-01-02 3.980000 3.840000 3.850000 3.950000 20548400.0 3.950000 NaN NaN NaN
2014-01-03 4.000000 3.880000 3.980000 4.000000 22887200.0 4.000000 NaN NaN NaN
2014-01-06 4.180000 3.990000 4.010000 4.130000 42398300.0 4.130000 NaN NaN NaN
2014-01-07 4.250000 4.110000 4.190000 4.180000 42932100.0 4.180000 NaN NaN NaN
2014-01-08 4.260000 4.140000 4.230000 4.180000 30678700.0 4.180000 NaN NaN NaN
... ... ... ... ... ... ... ... ... ...
2019-11-22 39.889999 38.189999 39.360001 39.150002 56931900.0 39.150002 38.827000 36.894000 32.435000
2019-11-25 40.169998 39.490002 39.500000 39.790001 45769500.0 39.790001 39.175000 37.199000 32.574000
2019-11-26 39.480000 38.810001 39.299999 38.990002 43603300.0 38.990002 39.403001 37.497001 32.708833
2019-11-27 39.759998 39.070000 39.459999 39.410000 33630100.0 39.410000 39.592001 37.811000 32.849833
2019-11-29 39.510101 39.020000 39.099998 39.150002 17242025.0 39.150002 39.672001 38.072001 32.977333

1489 rows × 9 columns

The frequencies of different outcomes simulated form a Bell curve. The most likely return is in the middle of the curve. This means there is an equal chance that the actual return will be higher or lower than that value.

In [45]:
Simulations = Final_price_distribution_simulations(Stock,  mu = mu, sigma = sigma)
|#########################################################################|100%
In [46]:
Final_price_distribution_plot(Simulations, Stock)

See more details about Value at Risk (VaR) here.

Apple Inc.

In [47]:
Stock = 'AAPL'
# mean
mu = All_returns.mean()[Stock]
# standard deviation
sigma = All_returns.std()[Stock]
# Analysis
Monte_Carlo_Analysis(Stock, mu = mu, sigma = sigma)
Out[47]:
High Low Open Close Volume Adj Close Moving Ave. 10 days Moving Ave. 20 days Moving Ave. 60 days
Date
2014-01-02 79.575714 78.860001 79.382858 79.018570 58671200.0 71.107201 NaN NaN NaN
2014-01-03 79.099998 77.204285 78.980003 77.282860 98116900.0 69.545288 NaN NaN NaN
2014-01-06 78.114288 76.228569 76.778572 77.704285 103152700.0 69.924515 NaN NaN NaN
2014-01-07 77.994286 76.845711 77.760002 77.148575 79302300.0 69.424438 NaN NaN NaN
2014-01-08 77.937141 76.955711 76.972855 77.637146 64632400.0 69.864105 NaN NaN NaN
... ... ... ... ... ... ... ... ... ...
2019-11-22 263.179993 260.839996 262.589996 261.779999 16331300.0 261.779999 263.740005 258.149870 235.532337
2019-11-25 266.440002 262.519989 262.709991 266.369995 21005100.0 266.369995 264.157004 259.053143 236.503250
2019-11-26 267.160004 262.500000 266.940002 264.290009 26301900.0 264.290009 264.390005 260.139556 237.490012
2019-11-27 267.980011 265.309998 265.579987 267.839996 16308900.0 267.839996 264.727005 261.404964 238.477948
2019-11-29 267.989990 265.899994 266.600006 267.250000 10584652.0 267.250000 265.188004 262.366695 239.388088

1489 rows × 9 columns

In [48]:
Simulations = Final_price_distribution_simulations(Stock,  mu = mu, sigma = sigma)
|#########################################################################|100%
In [49]:
Final_price_distribution_plot(Simulations, Stock)

Microsoft Corporation

In [50]:
Tech = 'MSFT'
# mean
mu = All_returns.mean()[Stock]
# standard deviation
sigma = All_returns.std()[Stock]
# Analysis
Monte_Carlo_Analysis(Stock, mu = mu, sigma = sigma)
Out[50]:
High Low Open Close Volume Adj Close Moving Ave. 10 days Moving Ave. 20 days Moving Ave. 60 days
Date
2014-01-02 79.575714 78.860001 79.382858 79.018570 58671200.0 71.107201 NaN NaN NaN
2014-01-03 79.099998 77.204285 78.980003 77.282860 98116900.0 69.545288 NaN NaN NaN
2014-01-06 78.114288 76.228569 76.778572 77.704285 103152700.0 69.924515 NaN NaN NaN
2014-01-07 77.994286 76.845711 77.760002 77.148575 79302300.0 69.424438 NaN NaN NaN
2014-01-08 77.937141 76.955711 76.972855 77.637146 64632400.0 69.864105 NaN NaN NaN
... ... ... ... ... ... ... ... ... ...
2019-11-22 263.179993 260.839996 262.589996 261.779999 16331300.0 261.779999 263.740005 258.149870 235.532337
2019-11-25 266.440002 262.519989 262.709991 266.369995 21005100.0 266.369995 264.157004 259.053143 236.503250
2019-11-26 267.160004 262.500000 266.940002 264.290009 26301900.0 264.290009 264.390005 260.139556 237.490012
2019-11-27 267.980011 265.309998 265.579987 267.839996 16308900.0 267.839996 264.727005 261.404964 238.477948
2019-11-29 267.989990 265.899994 266.600006 267.250000 10584652.0 267.250000 265.188004 262.366695 239.388088

1489 rows × 9 columns

In [51]:
Simulations = Final_price_distribution_simulations(Stock,  mu = mu, sigma = sigma)
|#########################################################################|100%
In [52]:
Final_price_distribution_plot(Simulations, Stock)